In [7]:
import csv, os, time, datetime
import lib.django.djff.models as dm
all_manual_tags_query = dm.ManualTag.objects.all()
researchers = list(dm.Researcher.objects.all())
experiments = list(dm.Experiment.objects.exclude(pk=11))
species = list(dm.Species.objects.all())
query_list = [all_manual_tags_query]
After running the cell above to set things up and load all of the tags into the all_manual_tags object, we need to filter them to select only the ones that you want to export.
Each one of these queries builds on the previous query. At each stage in the examples below, we are taking the last query on the query_list, filtering it, and appending the new query to the end of the query_list.
We start by excluding all tags generated by a researcher known to produce low-quality tags:
In [2]:
for researcher in researchers:
if 'Wil' in researcher.name:
print 'ID:{}, name:{}'.format(researcher.id, researcher.name)
In [3]:
print 'pre-filter:', query_list[-1].count()
query_list.append(query_list[-1].exclude(researcher_id=2))
print 'post-filter:', query_list[-1].count()
Next, let's only keep the pleco tags:
In [9]:
print 'pre-filter:', query_list[-1].count()
# For each tag, only keep it if the shortname of the species of the experiment of the image that
# this tag is attached to is "HP":
query_list.append(query_list[-1].filter(image__xp__species__shortname='HP'))
print 'post-filter:', query_list[-1].count()
print 'pre-filter:', query_list[-1].count()
# For each tag, only keep it if the name of the experiment of the image that
# this tag is attached to contains "Pleco":
query_list.append(query_list[-1].filter(image__xp__name__contains='Pleco'))
print 'post-filter:', query_list[-1].count()
Now let's keep only the tags from January of 2015:
In [10]:
print 'pre-filter:', query_list[-1].count()
query_list.append(query_list[-1].filter(timestamp__month=2, timestamp__year=2016))
print 'post-filter:', query_list[-1].count()
Finally, we name the latest query export_these_tags and turn it into a Python list for further processing:
In [11]:
export_these_tags = list(query_list[-1])
In [8]:
export_these_tags_test = list(
query_list[0].exclude(
researcher_id=2).filter(
image__xp__species__shortname='HP').filter(
image__xp__name__contains='Pleco').filter(
timestamp__month=1, timestamp__year=2015)
)
print 'Same length:', len(export_these_tags) == len(export_these_tags_test)
print 'Same tags:', [tag.id for tag in export_these_tags] == [tag.id for tag in export_these_tags_test]
Either way, you want to end up with an export_these_tags list (or query) at the end of the filtering stage.
In [ ]:
In [ ]:
In [ ]:
In [ ]:
# export_these_tags = query_list[-1]
In [12]:
# set this to suit yourself - leaving it blank is perfectly valid
custom_prefix = ''
# These are probably comfortable defaults
export_directory = '/mnt/server_storage/exported_data'
timestamp_export_file = True
#
# First, we build the full pathname of the exported file:
#
if timestamp_export_file:
prefix = datetime.datetime.fromtimestamp(time.time()).strftime('%Y-%m-%d_%H:%M:%S-')
else:
prefix = ''
export_filename = custom_prefix + prefix + 'fishface_exported_data.csv'
export_path = os.path.join(export_directory, export_filename)
cjr_start_cache = dict()
#
# Then we write the file from `export_these_tags`:
#
with open(export_path, 'wb') as csvfile:
writer = csv.writer(csvfile, dialect=csv.excel)
# Write header row
writer.writerow([
'TagId',
'TaggerID',
'Species',
'XPName',
'ImageTimestamp',
'ImageVoltage',
'TagTimestamp',
'TagStartX',
'TagStartY',
'Angle',
'CJR_ID',
'SecondsSinceCJRFirstCapture',
])
species = export_these_tags[0].image.xp.species.shortname
for tag in export_these_tags:
cjr_id = tag.image.cjr_id
if cjr_id not in cjr_start_cache:
images_timestamps_for_cjr = [im.capture_timestamp for im in
dm.CaptureJobRecord.objects.get(pk=cjr_id).image_set.all()]
cjr_start_cache[cjr_id] = sorted(images_timestamps_for_cjr)[0]
delta = (tag.image.capture_timestamp - cjr_start_cache[cjr_id])
seconds_since_cjr_start = delta.seconds + (float(delta.microseconds) / 1000000)
x, y = tag.int_start
tag_image = tag.image
writer.writerow([
tag.id,
tag.researcher_id,
species,
tag_image.xp.name,
tag_image.capture_timestamp.strftime('%Y-%m-%d %H:%M:%S'),
tag_image.voltage,
tag.timestamp.strftime('%Y-%m-%d %H:%M:%S'),
x,
y,
tag.degrees,
cjr_id,
seconds_since_cjr_start,
])
In [ ]: